<?php

/*
 * @author:      Erlend ter Maat
 * @licence      GNU General Public Licence v2
 * 
 * A curl-based webbrowser, for automating web-browsing tasks.
 * Keep it Simple, Save and Stupid
 *
 * Flash is not supported, html-5 in most occasions.
 */

class WebBrowser {

  protected $base_url;
  protected $nocookies;
  protected $cookie_file;
  protected $curl;
  protected $page;
  protected $headers;
  protected $history;
  protected $debug;
  protected $alternative_interface;

  public function debug($msg) {
    if ($this->debug) {
      echo "$msg\n";
    }
  }

  public function __construct($base_url, $debug=false, $nocookies=false) {
    $this->debug('Enter: constructor()');
    $this->base_url = rtrim($base_url, '/');
    $this->curl = null;
    $this->history = array();
    $this->debug = $debug;
    $this->nocookies = $nocookies;
    $this->alternative_interface = null;

    $this->reset();
  }

  protected function prepend($url, &$param, &$page, &$headers) {
    $this->debug('Enter: prepend()');
    $item = array(
      'url' => $url,
      'param' => $param,
      'content' => $page,
      'headers' => $headers,
      'datetime' => time()
    );

    array_unshift($this->history, $item);
  }

  /**
   * Send a get-request to the server
   *
   * @param string $relative_url
   * @param array $param
   * @return bool
   */
  public function submit($relative_url='/', $get=null, $post=null) {
    $this->debug("Enter: submit($relative_url)");
    // order the get parameters
    $param_url = '';
    if (is_array($get) && count($get) > 0) {
      $param_url = '?';
      foreach ($get as $field => $value) {
        $encoded = urlencode($value);
        $param_url .= "$field=$encoded&";
      }

      $param_url = rtrim($param_url, '&');
    }


    // initialize the request
    $this->init($relative_url . $param_url);

    // post values
    if (is_array($post)) {
      // change request type and enter values
      curl_setopt($this->curl, CURLOPT_POST, true);
      curl_setopt($this->curl, CURLOPT_POSTFIELDS, $post);

      $this->debug("Submit():post");
    } else {
      // get request otherwise
      curl_setopt($this->curl, CURLOPT_POST, false);
      curl_setopt($this->curl, CURLOPT_HTTPGET, true);

      $this->debug("Submit():get $param_url");
    }



    $this->page = curl_exec($this->curl);
    $this->headers = curl_getinfo($this->curl);

    if ($this->page <> '') {
      $this->prepend($relative_url, $param_url, $this->page, $this->headers);
      return true;
    }

    return false;
  }

  /**
   * Download to a file. Specify remote location, local directory
   * The return value will by the full path to the (local) file on
   * a succesful download. FALSE otherwise.
   *
   * @param string $location
   * @param string $filename
   * @return string
   */
  public function download($location, $filename) {
    touch($filename);

    if (!file_exists($filename)) {
      $this->debug('Could not create ' . $filename);
      return false;
    }

    $fp = fopen($filename, 'w');

    if ($fp === false) {
      $this->debug('Could not open file ' . $filename . ' for writing.');
      return false;
    }

    $this->init($location);
    curl_setopt($this->curl, CURLOPT_FILE, $fp);
    $result = curl_exec($this->curl);

    $this->page = null;
    $this->headers = curl_getinfo($this->curl);

    return $filename;
  }

  /**
   * Get the output of the $this->page variable
   * return xpath elements
   *
   * @param string|array $xpath
   * @return mixed
   */
  public function getPage($xpath=null) {
    $this->debug('Enter: getPage()');
    if (!$xpath) {
      return $this->page;
    }

    $document = new DomDocument();
    @$document->loadHTML($this->page);

    if (!is_array($xpath)) {
      $xpath = array($xpath);
    }

    $selector = new DOMXpath($document);

    $result = array();

    foreach ($xpath as $eval) {
      $elements = $selector->query($eval);

      $result[$eval] = array();
      if ($elements) {
        foreach ($elements as $domNode) {
          $result[$eval][] = $document->saveXML($domNode);
        }
      }
    }

    return $result;
  }

  public function getLocation() {
    $this->debug('Enter: getLocation()');


    if (count($this->history) > 0) {
      $this->debug('location at ' . count($this->history));
      return $this->history[0]['headers']['url'];
    }

    return false;
  }

  /**
   * Find all links on a page
   * TODO: implement links without a description, to read from alt
   * TODO: implement DomDocument method to collect links
   *
   * @param string $relative_url
   * @return array
   */
  public function getLinks() {
    $this->debug('Enter: getLinks()');
    $result = array();

    $path = '//a[@href]';

    $links = $this->getPage($path);

    // there just ain't links on this page
    if (count($links[$path]) == 0) {
      return array();
    }

    $link_xml = '<links>' . implode($links[$path]) . '</links>';

    $doc = new DomDocument();
    @$doc->loadHTML($link_xml);

    $anchors = $doc->getElementsByTagName('a');

    foreach ($anchors as $element) {
      // get attributes
      $item = array();
      $item['href'] = $element->getAttribute('href');
      $item['title'] = $element->getAttribute('title');
      $item['id'] = $element->getAttribute('id');

      /**
       * sanitize the href, remove all behind space
       */
      $space = strpos($item['href'], ' ');
      if ($space > 0) {
        $item['href'] = trim(substr($item['href'], 0, $space));
      }

      if ($element->hasChildNodes()) {
        $content = $element->textContent;
        $item['content'] = $content;
      }

      $result[] = $item;
    }

    return $result;
  }

  /**
   * Reset the cookie file (equals logout for most apps)
   *
   * @return bool
   */
  public function reset() {
    $this->debug('Enter: reset()');
    if ($this->cookie_file <> '' && file_exists($this->cookie_file)) {
      unlink($this->cookie_file);
    }

    if (!$this->nocookies) {
      $this->cookie_file = tempnam('/tmp', 'connecturl_cookie_');

      $this->debug('reset():file ' . $this->cookie_file);

      if ($this->cookie_file <> '') {
        return true;
      }

      return false;
    }

    return true;
    // not estetically correct, but sure it works
  }

  /**
   * Close the connection
   */
  public function close() {
    if ($this->curl <> null) {
      curl_close($this->curl);
      $this->curl = null;
    }
  }

  /**
   * Initialize the connection to the server
   *
   * @param string $relative_url
   * @return bool
   */
  protected function init($relative_url) {
    $this->close();

    if (preg_match('/^https?:\/\//', $relative_url)) {
      $url = $relative_url;
    } else {
      $url = $this->base_url . '/' . ltrim($relative_url, '/');
    }

    $this->debug('Enter: init()');
    $this->curl = curl_init($url);

    if (!$this->curl) {
      return false;
    }

    if (!$this->nocookies) {
      curl_setopt($this->curl, CURLOPT_COOKIEFILE, $this->cookie_file);
      curl_setopt($this->curl, CURLOPT_COOKIEJAR, $this->cookie_file);
    }
    curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, 1);
    curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, 1);

    if ($this->alternative_interface <> null) {
      curl_setopt($this->curl, CURLOPT_INTERFACE, $this->alternative_interface);
    }

    return true;
  }

  /**
   * Return the current history raw format
   *
   * @return string
   */
  public function dump() {
    $this->debug('Enter: dump()');

    return print_r($this->history, true);
  }

  /**
   * Get raw cookies from file
   *
   * @return string
   */
  public function getCookies() {
    if ($this->nocookies) {
      return false;
    }
    return file_get_contents($this->cookie_file);
  }

  public function getResultHeaders() {
    return $this->headers;
  }

  /**
   * set the ip of the interface to use for the connection. Of course the
   * interface must be set up correctly at the host system.
   *
   * @param string $ip
   */
  public function setAlternativeInterface($ip=null) {
    $this->alternative_interface = $ip;
  }

  public function getHeader($key, $default=null) {
    if (!array_key_exists($key, $this->headers)) {
      return $default;
    }

    return $this->headers[$key];
  }

}
